
# delim ;  
clear all;
set matsize 11000 ;
eststo clear ;
capture program drop add_lab_FE;
capture log close ; 
log using "Resurfacing_events.log", text replace;

*Define selected outcomes ; 
global var_outcomes iri sn_d urban
							aadt_10000
							pavement_thickness
							
;
					;
global more_var  Dcat_surface_cat surface_cat surface_type	average_grade_seg			
							pavement_section truck_route
							sh_h20_10m_s sh_urb_10m_s sh_elev_10m_s sh_sd_elev_10m_s sh_temp_10m_s sh_precip_10m_s 
							multiple_id_1998_before
							;
		
use "../data/hpms_hwy_stats_88_08.dta", clear  ;

do "_setup_expenditure.do";
do "_cleaning_and_new_variables_sample.do";



order irsstatecode state state_name id year I2_t ${var_outcomes} ${more_var};
keep  irsstatecode state state_name id year I2_t  ${var_outcomes} ${more_var};
sort id year ;

drop if year<1992;
*----------------------------------------------------------------------------------;
* - Keep only the segments with resurfacing events ; 
*----------------------------------------------------------------------------------;
bys id: egen maintained=total(I2_t);
tab maintained, m; 
drop if maintained==0;

*----------------------------------------------------------------------------------;
* - create and event indicator ;
*----------------------------------------------------------------------------------;

bys id: gen xx=cond(I2_t==1,1,.);
bys id: gen segment_event=sum(xx);

*mt -- it would be better to do this yourself.  Then the information on segment id and event would be encoded in the ID if we ever want it;
tostring segment_event, gen(yyy);
gen id_event= id+ ":" +yyy  ;
drop yyy;


*** This a tricky step. I define a time for each event within a segment so I can recover
	the full information for that segment;
forvalues i=1(1)5{;
	gen ww`i'= year*xx if segment_event==`i';
	gen yy`i'= year if segment_event==`i';
	bys id: egen event_year`i'=mean(ww`i');
	bys id: egen max_year`i'=max(yy`i'); 
	bys id: egen min_year`i'=min(yy`i');	
	
	
	gen time`i'=year-event_year`i' ;
	drop ww`i' yy`i';
	
};
sort id year ;
*----------------------------------------------------------------------------------;
*- Create a dataset at the event level with all the information of the ;
*----------------------------------------------------------------------------------;

bys id: egen last_year=max(year);

preserve ; 
	keep irsstatecode id  year I2_t ${more_var} ;
	save "temp_var", replace ;
restore; 

keep irsstatecode id year   last_year I2_t ${var_outcomes} time* event_year* max_year* min_year*;

* In this step I create the data at the segment -event level. For each event
 I keep all the infomation of segment. -- (tricky);
 
reshape long time event_year max_year min_year , i( irsstatecode id  year I2_t ${var_outcomes}  last_year ) j(segment_event );
sort  id segment_event time;
order  id segment_event time;

*generate id;
tostring segment_event, gen(yyy);
gen id_event= id+ ":" +yyy  ;
drop  yyy;

sort id segment_event year ;

order id segment_event  id_event year;
drop if time==.;


*add the additional variables I want to have; 

merge m:1 id year using  "temp_var", nogen ;
erase "temp_var.dta";

*----------------------------------------------------------------------------------;
* - save data in long format  ;
*----------------------------------------------------------------------------------;

sort id segment_event year ;


gen lenght=max_year - event_year;

gen	    D_surface=Dcat_surface_cat; 
replace D_surface=cond(Dcat_surface_cat==44 | Dcat_surface_cat==55 | Dcat_surface_cat==66 | Dcat_surface_cat==65 |  Dcat_surface_cat==0,Dcat_surface_cat,0);  


label define D_surface 0 "Other"
					   44 "Flexible"
					   55 "Rigid"
					   65 "Rig/Comp"
					   66 "Composite"
					   ;


label values D_surface  D_surface ;




gen censored=cond(max_year==last_year,1,0);
label define censored 1 "Ends with last year of sample" 
					  0 "New resurfacing before last sample year" ;	
label values censored censored ;

					  
bys id_event (time): gen cumul_aadt=sum(aadt) if time>=0 ;**!! DO NOT CHANGE IT TO EGEN;				  


gen period=1 if event_year <1992;
replace period=2 if event_year >=1992 & event_year <1996;
replace period=3 if event_year >=1996 & event_year <2000;
replace period=4 if event_year >=2000 & event_year <2005;
replace period=5 if event_year >=2005 & event_year  !=.  ;

label define period 1 "<92" 
					2 "92-95"
					3 "96-99"
					4 "00-04"
					5 "05-08";
label values period period; 					
tab  year period;

* Dummy for segments with multiple events; 

bys id: egen xx=max(segment_event);
gen multiple_events=cond(xx!=1,1,0); 
tab multiple_events, m ; 
drop xx ; 


forvalues i=2(1)5{;

	by id : gen xx`i'=event_year if segment_event==`i';
	by id : egen year_event`i'=mean(xx`i');

	drop xx`i';
};
gen next_event_year=.;
gen next_event_gap=.;

forvalues i=1(1)4{;
	local j=`i'+1;
	replace next_event_year=year_event`j' if segment_event==`i';
	replace next_event_gap=next_event_year-event_year;
	
	drop  year_event`j'; 
};



sort id id_event time;
encode id_event, gen(id_event_code);
xtset id_event_code time ;


replace sn=. if sn_d==0;

tab period, m ; 




tab D_surface, gen(Surf_);

gen D_aadt_10000=aadt_10000-l.aadt_10000;
label var  D_aadt_10000  "AADT(0)-AADT(-1)";

gen D_sn_d=sn_d-l.sn_d;
replace D_sn_d=.  if abs(D_sn_d)>5 & time==0;
label var  D_sn_d  "SN(0)-SN(-1)";

gen D_sn_d_2=sn_d-l2.sn_d;
replace D_sn_d_2=.  if abs(D_sn_d_2)>5 & time==0;
label var  D_sn_d_2 "SN(0)-SN(-2)";

gen D_sn_d_1=f.sn_d-l.sn_d;
replace D_sn_d_1=.  if abs(D_sn_d_2)>5 & time==0;
label var  D_sn_d_1 "SN(1)-SN(-1)";


gen D_iri=iri-l.iri;
label var  D_iri "IRI(0)-IRI(-1)" ;


gen Df4_iri=f4.iri-iri;
replace Df4_iri=. if abs(Df4_iri)>30 & time==0;
label var Df4_iri " IRI(4)-IRI(0)";


gen Df3_iri=f3.iri-iri;
replace Df3_iri=. if abs(Df3_iri)>30 & time==0;
label var Df3_iri " IRI(3)-IRI(0)";



*gen aadt_4yr=f4.aadt_10000+f3.aadt_1000+f2.aadt_10000+f1.aadt_10000 +aadt_10000;
*label var aadt_4yr  "AADT(0)+AADT(1)+...+AADT(4)";

gen aadt_3yr=f3.aadt_1000+f2.aadt_10000+f1.aadt_10000 +aadt_10000;
label var aadt_3yr  "AADT(0)+AADT(1)+...+AADT(3)";


*gen Df4_iri_by_aadt_4yr=Df4_iri/aadt_4yr;
*label var Df4_iri_by_aadt_4yr " IRI/AADT using 4 year totals "; 

gen Df3_iri_by_aadt_3yr=Df3_iri/aadt_3yr;
label var Df3_iri_by_aadt_3yr " IRI/AADT using 3 year totals "; 


gen Surf_all=1 ;



** Identify the events with huge changes in IRI and inspect them; 

gen out_Diri3yr=cond(Df3_iri<-10 & time==0,1,0);
bys id_event: egen segment_out_Diri3yr=max(out_Diri3yr);

gen out_D_sn_d=cond(abs(D_sn_d)>4 & time==0 &D_sn_d!=.,1,0);
bys id_event (time): egen segment_out_D_sn_d=max(out_D_sn_d);


foreach x in next_event_gap D_surface irsstatecode urban event_year multiple_events next_event_gap {;
 tab `x'  out_D_sn_d if time==0, m  row;
};


	keep  if time>-6 &  time<6;
	drop if next_event_gap<=4 | last_year<=1994;
	drop if lenght<=4;
	keep if D_surface==44;
	tab time, gen(Dum_time)	;
	gen D_time=time+5;
	labmask D_time, values(time);
	tab D_time, m;
	
	global time_dummies ;
	foreach var in sn_d iri{;
		reg `var' ib4.D_time,  robust cluster(id)  ;
		parmest,format(label estimate  min95 max95 %8.2f p %8.1e) list(,) label saving("temp_`var'_vs_time.dta", replace );

	};
	
	
*Figure ;
			
			use "temp_sn_d_vs_time.dta", clear;
			
			gen var= "sn_dD";
			
			append using "temp_iri_vs_time.dta";
			
			replace var="iri" if var=="";
			drop if label=="State IRS code";
			gen time=subinstr(parm, ".D_time","", .);
			replace time="4"  if time=="4b";
			destring time, replace force; 

			keep parm time  estimate min max var;
			drop if time==.;
			replace time=time-5;
			reshape wide estimate min max,i(time) j(var) string;

			ren estimate* *;
			label var iri "IRI";
			label var sn_d "SN/D";
			label var 	min95iri "95% CI" ;
			label var  max95iri "95% CI" ;
			label var min95sn_d "95% CI" ;
			label var max95sn_d "95% CI"  ;

			tw 
			   (rarea min95iri max95iri time  ,msize(*0.6) color(%30))  
			   (rarea min95sn_d max95sn_d time ,msize(*0.6) yaxis(2)   color(%20))
			   (con iri  time ,msize(*0.6))  
			   (con sn_d time ,msize(*0.6) yaxis(2)  lp(longdash) )
			  , 
			  ytitle("IRI") ytitle( "SN/D",axis(2) )
				   legend( off
				   order(2 4 1)
				   pos(2) col(2) ring(0))
				   xlabel(-5(1)5)
				   xtitle("")
					xline(0, lp(dot) lc(gs8))
					scale(*1.3)
					xmtick(##5)
					ymtick(##4)
				   ;
				   pause ;
				   graph export "${output}/Figures/FigureB2_events.pdf", replace ;
		


	erase "temp_sn_d_vs_time.dta";
	erase "temp_iri_vs_time.dta";



   
   
   
